import sys,os
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__),'..'))
sys.path.append(project_root)
sys.path.append(r'../witin_nn/nn')

import torch
import witin_nn
import torch.nn as nn
from witin_nn.interface import ConfigFactory
from witin_nn import HandleNegInType
from utils_get_fixed_point_model import get_x_weight_scale


class ResidualBlock(nn.Module):
    def __init__(self, inchannel, outchannel, config, stride=1):
        super(ResidualBlock, self).__init__()
        self.config = config

        self.left1 = nn.Sequential(
            witin_nn.WitinConv2d(inchannel, outchannel, kernel_size=3, stride=stride, padding=1, bias=False, layer_config=self.config),
            witin_nn.WitinBatchNorm2d(outchannel, layer_config=self.config),
            witin_nn.WitinGELU(layer_config=self.config)
        )
        self.left2 = nn.Sequential(
            witin_nn.WitinConv2d(outchannel, outchannel, kernel_size=3, stride=1, padding=1, bias=False, layer_config=self.config),
            witin_nn.WitinBatchNorm2d(outchannel, layer_config=self.config),
            witin_nn.WitinGELU(layer_config=self.config)
        )
        self.shortcut = nn.Sequential()
        if stride != 1 or inchannel != outchannel:
            self.shortcut = nn.Sequential(
                witin_nn.WitinConv2d(inchannel, outchannel, kernel_size=1, stride=stride, bias=False, layer_config=self.config),
                witin_nn.WitinBatchNorm2d(outchannel, layer_config=self.config),
                witin_nn.WitinGELU(layer_config=self.config)
            )

        '''
        self.left_conv1 = witin_nn.WitinConv2d(inchannel, outchannel, kernel_size=3, stride=stride, padding=1, bias=False, layer_config=self.config)
        self.left_bn1 = witin_nn.WitinBatchNorm2d(outchannel, layer_config=self.config)
        self.left_gelu1 = witin_nn.WitinGELU(layer_config=self.config)
        self.left_conv2 = witin_nn.WitinConv2d(outchannel, outchannel, kernel_size=3, stride=1, padding=1, bias=False, layer_config=self.config)
        self.left_bn2 = witin_nn.WitinBatchNorm2d(outchannel, layer_config=self.config)
        self.left_gelu2 = witin_nn.WitinGELU(layer_config=self.config)
        self.shortcut_conv = module.EmptyModule()
        self.shortcut_bn = module.EmptyModule()
        self.shortcut_gelu = module.EmptyModule()
        if stride != 1 or inchannel != outchannel:
            self.shortcut_conv = witin_nn.WitinConv2d(inchannel, outchannel, kernel_size=1, stride=stride, bias=False, layer_config=self.config)
            self.shortcut_bn = witin_nn.WitinBatchNorm2d(outchannel, layer_config=self.config)
            self.shortcut_gelu = witin_nn.WitinGELU(layer_config=self.config)
        '''
        self.gelu = witin_nn.WitinGELU(layer_config=self.config)
        self.add = witin_nn.WitinElementAdd(layer_config=self.config)

    def forward(self, x):

        out = self.left1(x)
        out = self.left2(out)
        shortcut = self.shortcut(x)
        out = self.add(out, shortcut)
        out = self.gelu(out)

        '''
        out = self.left_conv1(x)
        out = self.left_bn1(out)
        out = self.left_gelu1(out)
        out = self.left_conv2(out)
        out = self.left_bn2(out)
        out = self.left_gelu2(out)
        if self.shortcut_conv != module.EmptyModule():
            shortcut = self.shortcut_conv(x)
            shortcut = self.shortcut_bn(shortcut)
            shortcut = self.shortcut_gelu(shortcut)
        else:
            shortcut = torch.tensor(0)
        
        out = self.add(out, shortcut)
        out = self.gelu(out)
        '''
        return out

    def quantize_inference(self, x):
        x_s1, w_s1 = get_x_weight_scale(self.left1[0])
        out = self.left1(x*x_s1)
        out = (out / x_s1 / w_s1)
        x_s2, w_s2 = get_x_weight_scale(self.left2[0])
        out = self.left2(out*x_s2)
        out = (out / x_s2 / w_s2)
        if len(self.shortcut._modules) == 0:
            x_s3, w_s3 = x_s1, 1
        else:
            x_s3, w_s3 = get_x_weight_scale(self.shortcut[0])
        shortcut = self.shortcut(x*x_s3)
        shortcut = shortcut / x_s3 / w_s3
        out = self.add(out, shortcut)
        out = self.gelu(out)
        return out
    '''
    def quantize(self, num_bits=8):
        self.qconvbngelu1 = module.QConvBNGELU(self.left_conv1, self.left_bn1, self.left_gelu1, qi=True, qo=True, num_bits=num_bits)
        self.qconvbngelu2 = module.QConvBNGELU(self.left_conv2, self.left_bn2, self.left_gelu2, qi=False, qo=True, num_bits=num_bits)
        if self.shortcut_conv != module.EmptyModule():
            self.qconvbngelu3 = module.QConvBNGELU(self.shortcut_conv, self.shortcut_bn, self.shortcut_gelu, qi=True, qo=False, num_bits=num_bits)
        self.qgelu = module.QGELU(self.gelu, qi=False, num_bits=num_bits)

    def quantize_forward(self, x):
        x = self.qconvbngelu1(x)
        x = self.qconvbngelu2(x)
        shortcut = self.qconvbngelu3(x)
        x = self.add(x, shortcut)
        x = self.qgelu(x)
        return x

    def freeze(self):
        self.qconvbngelu1.freeze()
        self.qconvbngelu2.freeze()
        self.qconvbngelu3.freeze()
        self.qgelu.freeze()

    def quantize_inference(self, x):
        qx = self.qconvbngelu1.qi.quantize_tensor(x)
        qx = self.qconvbngelu1.quantize_inference(qx)
        qx = self.qconvbngelu2.quantize_inference(qx)
        shortcut = self.qconvbngelu3.qi.quantize_tensor(x)
        shortcut = self.qconvbngelu3.quantize_inference(shortcut)
        qx = self.add(qx, shortcut)
        qx = self.qgelu(qx)
        return qx
    '''

class ResNet(nn.Module):
    def __init__(self, ResidualBlock, num_classes=10):
        super(ResNet, self).__init__()
        self.config_global = ConfigFactory.GlobalConfigFactory.get_dafault_config()
        self.layer_config1 = self.define_config_layer()
        self.layer_config2 = self.define_config_layer()
        self.layer_config3 = self.define_config_layer()
        self.layer_config4 = self.define_config_layer()
        self.layer_config5 = self.define_config_layer()
        self.layer_config6 = self.define_config_layer()
        self.inchannel = 64

        self.quantization_params = {}

        self.conv1 = nn.Sequential(
            witin_nn.WitinConv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False, layer_config=self.layer_config1),
            witin_nn.WitinBatchNorm2d(64, layer_config=self.layer_config1),
            witin_nn.WitinGELU(layer_config=self.layer_config1)
        )
        self.layer1 = self.make_layer(ResidualBlock, self.layer_config2, 64, 2, stride=1)
        self.layer2 = self.make_layer(ResidualBlock, self.layer_config3, 128, 2, stride=2)
        self.layer3 = self.make_layer(ResidualBlock, self.layer_config4, 256, 2, stride=2)
        # self.layer4 = self.make_layer(ResidualBlock, 512, 2, stride=2)
        self.conv2 = nn.Sequential(
            witin_nn.WitinConv2d(256, 256, kernel_size=4, stride=4, bias=False, layer_config=self.layer_config5),
            witin_nn.WitinBatchNorm2d(256, layer_config=self.layer_config5),
            witin_nn.WitinGELU(layer_config=self.layer_config5)
        )
        self.dropout = nn.Dropout(0.2)
        self.fc = witin_nn.WitinLinear(1024, num_classes, layer_config=self.layer_config6)

    def make_layer(self, block, layer_config, channels, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.inchannel, channels, layer_config, stride))
            self.inchannel = channels
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv1(x)
        out = self.layer1(out)
        # print(out)
        out = self.layer2(out)
        out = self.layer3(out)
        # out = self.layer4(out)
        out = self.conv2(out)
        out = self.dropout(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out

    def quantize_inference(self, x):
        x_s1, w_s1 = get_x_weight_scale(self.conv1[0])
        out = self.conv1(x)
        out = (out / x_s1 / w_s1)
        for layer in self.layer1:
            out = layer.quantize_inference(out)
        for layer in self.layer2:
            out = layer.quantize_inference(out)
        for layer in self.layer3:
            out = layer.quantize_inference(out)
        x_s2, w_s2 = get_x_weight_scale(self.conv2[0])
        out = self.conv2(out*x_s2)
        out = (out / x_s2 / w_s2)
        out = self.dropout(out)
        out = out.view(out.size(0), -1)
        x_s3, w_s3 = get_x_weight_scale(self.fc)
        out = self.fc(out*x_s3)
        out = (out / x_s3 / w_s3)
        return out


    def define_config_layer(self):
        config = ConfigFactory.LayerConfigFactory.get_default_config()
        config.use_quantization = False
        config.w_clip = 127
        return config

    def config_layer(self, config, use_quantization=False, scale_x=1, scale_y=1, scale_weight=1, bias_row_N=8, noise_level=0):
        '''
        self.config_global.use_quantization = use_quantization
        self.config_global.scale_x = scale_x
        self.config_global.scale_y = scale_y
        self.config_global.scale_weight = scale_weight
        self.config_global.bias_row_N = bias_row_N
        self.config_global.noise_level = noise_level
        '''
        # self.config_global=ConfigFactory.GlobalConfigFactory.get_qat_nat_train_wtm2100_config()
        config.x_quant_bit = 8
        config.y_quant_bit = 8
        config.use_auto_scale = True
        # self.config_global.use_noise = use_noise
        config.use_quantization = use_quantization
        config.scale_x = scale_x
        config.scale_y = scale_y
        config.scale_weight = scale_weight
        config.bias_row_N = bias_row_N
        config.noise_level = noise_level
        config.handle_neg_in = HandleNegInType.PN
        # self.config.use_noise = use_noise


def ResNet18():
    return ResNet(ResidualBlock)